# ------------------------------------------------------
# ` generate Figures based on estimates model results
# ------------------------------------------------------
load_packages = c('rio','here','dplyr','data.table','fst',
	'drake','moments','igraph','Matrix','car','lfe','expss','survey',
	'RStata','stargazer','lfe')

invisible(lapply(load_packages, library, character.only = TRUE))

library(ggExtra)
library(ggplot2)
library(ggridges)
library(hrbrthemes)
library(gtable)
library(ggpubr)

# ------ specify directory
data_path = ""
project_path = ""

setwd(data_path)

# ------ run some codes to produce processed data sets 
source(file.path(project_path,'rdata_util.R'))

# ---- depression indicators
all_dep_indicators = c("s60i","s60j","s60k","s60l","s60m","s60n")
    #s60i : did you have trouble eating, or a poor appetite? $$$
    #s60j : did you have trouble falling asleep or staying asleep?  $$$
    #s60k : did you feel depressed or blue?  $$$
    #s60l : did you have trouble relaxing? 
    #s60m : were you moody? 
    #s60n : did you cry a lot?  $$$
    # [[ 0 : never, 1:rarely, 2:occasionally, 3:often, 4:everyday, 9:multiple response ]]

control.varlist = c('female','white','black','hispanic','other',
	'immig_1st','immig_2nd','immig_3rd','family_two','family_one','family_other','pa_educ')
    ind_controls = c('female','black','hispanic','other','immig_1st','immig_2nd',
      'family_step','family_one','family_other','pvt','pa_educ','assistance','sibsize')
    peer_controls = c(paste0(c('female','black','hispanic','other','immig_1st','immig_2nd',
      'family_one','family_other','pa_educ'),'.mean'))

# read data
raw_data_inschool = impute_aid_inschool(data.table(import(file.path(data_path,'rawdata','inschool.dta'))))

# process depression data
processed_data_depress = process_depress(raw_data_inschool, dep_indicators=all_dep_indicators)

m_filter = 'nofilter'
m_reference = 'all'

#------------------------------------------------------------------------------
# Figure 1 : distribution of depressive scores across grades within selected schools
#==============================================================================
processed_data_depress[,scid_grade := paste0(scid,":",grade)]
processed_data_depress[,N_grade := sum(!is.na(depress)), by=c('scid','grade')]
processed_data_depress[,grade := as.factor(grade)]

data_for_plot = processed_data_depress[!is.na(depress)&!is.na(scid)&!is.na(grade)&scid %in% c(87,267,33),]
data_for_plot[,group := factor(scid,levels=c(87,267,33),labels=c('School A','School B','School C'))]
data_for_plot = data_for_plot[grade!='8',]
data_for_plot[,grade := as.factor(grade)]

g <- ggplot(data_for_plot, 
			aes(x=depress, y=grade, fill=factor(..quantile..))) +
  stat_density_ridges(aes(y=grade),
    geom = "density_ridges_gradient",
  	quantile_lines = TRUE, calc_ecdf = TRUE, quantiles = c(0.05, 0.5, 0.95)) +
  #scale_fill_brewer(palette="OrRd",
  #  name = "Probability", 
  #  labels = c("[0, 0.05]", "(0.05, 0.5]", "(0.5,0.95]", "(0.95, 1]")  )+
scale_fill_manual(
  name = "Probability", values = c('white','gray','darkgray','black'), 
  labels = c("[0, 0.05]", "(0.05, 0.5]", "(0.5,0.95]", "(0.95, 1]")  ) +
	labs(y="Grade",x='Depressive Symptom Scale') +
  theme_pubr() + xlim(c(-0.7,4.5))+
  theme(
  		legend.position = 'top',
  		axis.text.y=element_text(vjust=-1.5)) +
  facet_wrap(~group, ncol=3)

ggsave(file.path(project_path,'output','distribution_grade.png'),g, width=13, height=8, scale=0.6)

#------------------------------------------------------------------------------
# Figure 2.  bivariate analysis
#==============================================================================
reg_data = data.table(import(file.path(data_path,'processed','reg_main_grade_all_include_self.dta')))
ind_controls = c('female','black','hispanic','other','immig_1st','immig_2nd',
  'family_step','family_one','family_other','pvt','pa_educ','assistance','sibsize')
peer_controls = c(paste0(c('female','black','hispanic','other','immig_1st','immig_2nd',
  'family_one','family_other','pa_educ'),'.mean'))

# generate the analytic sample
reg_data[!is.na(gswgt2) & !is.na(gswgt1),out_sample := 0L]
# reg_data[out_sample == 0L & is.na(sqid), out_sample := 1L]
  reg_data[out_sample == 0L & (is.na(peer_depress_median)|is.na(peer_depress_q05)|is.na(peer_depress_q95)), out_sample := 2L]
  reg_data[out_sample == 0L & (is.na(fcesd_2)), out_sample := 3L]
  reg_data[out_sample == 0L & (is.na(depress)), out_sample := 5L]
  reg_data[out_sample == 0L & (is.na(parent_attachment)), out_sample := 6L]
  reg_data[out_sample == 0L & !complete.cases(reg_data[,..ind_controls]), out_sample := 8L]

# compute the mean for each bin
compute_group = function(data, x,y,bins = 10) {
    bins     <- min(floor(nrow(data)/10), bins)
    x_bin    <- ggplot2::cut_interval(data[[x]], bins)
    x_means  <- stats::ave(data[[x]], x_bin, FUN = mean)
    y_means  <- stats::ave(data[[y]], x_bin, FUN = mean)
    y_se     <- stats::ave(data[[x]], x_bin, FUN = sd)
    y_obs    <- stats::ave(data[[y]], x_bin, FUN = length)
    result   <- data.table(
        x_bin=x_bin,
        x    = x_means, 
        y    = y_means, 
        ymax = y_means + 1.96*y_se/sqrt(y_obs),
        ymin = y_means - 1.96*y_se/sqrt(y_obs),
        obs = y_obs)
    result   <- na.omit(unique(result))
    setnames(result,'x','peer_measure')
    setnames(result,'y',y)
    setnames(result,'ymax',paste0(y,"_max"))
    setnames(result,'ymin',paste0(y,"_min"))
    return(result)
}

binned_reg_q95 = compute_group(reg_data[out_sample ==0,], 'peer_depress_q95', 'fcesd_2', bins=50)
binned_reg_q05 = compute_group(reg_data[out_sample ==0,], 'peer_depress_q05', 'fcesd_2', bins=50)
binned_reg_median = compute_group(reg_data[out_sample ==0,], 'peer_depress_median', 'fcesd_2', bins=50)
binned_reg_mean = compute_group(reg_data[out_sample ==0,], 'peer_depress_mean', 'fcesd_2', bins=50)

binned_reg = rbind(data.table(binned_reg_q95, measure='peer_depress_q95'),
  data.table(binned_reg_q05,measure='peer_depress_q05'),
  data.table(binned_reg_median,measure='peer_depress_median'))

binned_reg[,measure := factor(measure,
  levels=c('peer_depress_q05','peer_depress_median','peer_depress_q95'),
  labels=c('P(Nondepressed peers : Bottom 5)','Peer Depression Median','P(Highly depressed peers: Top 5)'))]

g <- binned_reg %>%
    ggplot(mapping = aes(x = peer_measure, y = fcesd_2, size=obs)) +
    geom_point(alpha=0.3) +
    geom_smooth(method="lm", se=T,color='black') +
    theme_pubr() + 
    labs(
      y="CESD (Wave 2)", 
     x="") +
    theme(legend.position = "none") +
    facet_wrap(~measure,ncol=3, scale='free_x',switch='x')+
    theme(strip.placement = "outside")+
    theme(axis.title.x = element_blank(),
        strip.background = element_blank())

ggsave(file.path(project_path,'output','binned_corr_with_wave2.png'),g,width=8,height=4)
  
# ----
# Figure 3 interaction effects
# ----
output = list()
output[[1]] = data.table(attachment='low', q95='low',coef=10.8 ,ymin=9.8,ymax=11.8)
output[[2]] = data.table(attachment='low', q95='high',coef=14.0,ymin=12.9 ,ymax=15.1)
output[[3]] = data.table(attachment='high',q95='low',coef=9.3,ymin=8.8,ymax=9.9)
output[[4]] = data.table(attachment='high',q95='high',coef=10.2,ymin=9.5,ymax=10.8)
output = rbindlist(output)

output[,attachment := factor(attachment,levels=c('low','high'))]
output[,q95 := factor(q95,levels=c('low','high'))]

g <- ggplot(output, aes(x=attachment,y=coef,ymin=ymin,ymax=ymax,fill=q95))+
  geom_col(stat='identity',color='black',position=position_dodge(),alpha=0.8) + 
  geom_errorbar(width=.2,position=position_dodge(.9)) +
  scale_fill_manual(
    name = "Percent of highly depressive peers", values = c('white','black'), 
    labels = c("low","high")) +
  labs(y="CESD (Wave 2)", 
       x="Parent-Child Attachment") +
  theme_pubr() +
  theme(legend.position = 'top')

ggsave(file.path(project_path,'output','interact_pattachment.png'),g, width=5, height=5)

# ----
# interaction effects (in response to reviewers) at the different pattachment values
# ----
output = list()
output[[1]] = data.table(attachment='low', q95='low',coef= 11.3 ,ymin= 10.1 ,ymax=12.5)
output[[2]] = data.table(attachment='low', q95='high',coef=14.5 ,ymin=13.1 ,ymax= 15.9)
output[[3]] = data.table(attachment='high',q95='low',coef=9.4,ymin=8.9,ymax= 9.9)
output[[4]] = data.table(attachment='high',q95='high',coef= 10.4,ymin=9.8 ,ymax= 11.0)
output = rbindlist(output)

output[,attachment := factor(attachment,levels=c('low','high'))]
output[,q95 := factor(q95,levels=c('low','high'))]

g <- ggplot(output, aes(x=attachment,y=coef,ymin=ymin,ymax=ymax,fill=q95))+
geom_col(stat='identity',color='black',position=position_dodge(),alpha=0.8) +
  geom_errorbar(width=.2,position=position_dodge(.9)) +
  scale_fill_manual(
    name = "Percent of highly depressive peers", values = c('white','black'), 
    labels = c("low","high")) +
  labs(y="CESD (Wave 2)", 
       x="Parent Child Attachment") +
  theme_pubr() +
  theme(legend.position = 'top')

ggsave(file.path(project_path,'output','interact_pattachment_alternative.png'),g, width=5, height=5)

#------------------------------------------------------------------------------
# Figure R1. overall distribution of depressive symptopms
# in responses to reviewers 
#==============================================================================
# requires the following R codes
source(file.path(project_path,'DEP_geom_marginal_boxplot.R'))

processed_data_depress[, `:=`(
        qt05 = quantile(depress,0.05,type = 7, na.rm=TRUE),
        qt10 = quantile(depress,0.10,type = 7, na.rm=TRUE),
        qt50 = quantile(depress,0.50,type = 7, na.rm=TRUE),
        qt90 = quantile(depress,0.90,type = 7, na.rm=TRUE),
        qt95 = quantile(depress,0.95,type = 7, na.rm=TRUE)
        )]  

processed_data_depress[depress <= qt05, depress_qt := 'Bottom 10%']
processed_data_depress[qt10 < depress & depress <= qt50, depress_qt := 'From 10% to 90%']
processed_data_depress[qt50 < depress & depress <= qt90, depress_qt := 'From 10% to 90%']
processed_data_depress[qt90 < depress & depress <= qt95, depress_qt := 'Top 10%']
processed_data_depress[qt95 < depress , depress_qt := 'Top 5%']

g = ggplot(processed_data_depress[!is.na(grade),], aes(x=depress, fill=depress_qt)) +
    geom_histogram(aes(y = (..count..)/sum(..count..)),bins=25, color='black',alpha=0.2) + 
    scale_y_continuous(labels = scales::percent) +
    theme_pubr() + 
    geom_marginboxplot(data=processed_data_depress[!is.na(grade),],aes(x=depress,y= 1), color='black',fill='black',sides = "t") +
    labs(x='Depressive Symptom Scale', y='Percentage', fill='',color='')
ggsave(file.path(project_path,'output','overall_histogram_box.png'),g, width=9, height=7, scale=0.6)

#------------------------------------------------------------------------------
# Figure R2. distribution of parent attachment 
# in responses to reviewers 
#==============================================================================

reg_data = data.table(import(file.path(data_path,'processed','reg_main_grade_all_include_self.dta')))
ind_controls = c('female','black','hispanic','other','immig_1st','immig_2nd',
  'family_step','family_one','family_other','pvt','pa_educ','assistance','sibsize')
peer_controls = c(paste0(c('female','black','hispanic','other','immig_1st','immig_2nd',
  'family_one','family_other','pa_educ'),'.mean'))

# generate the analytic sample
reg_data[!is.na(gswgt2) & !is.na(gswgt1),out_sample := 0L]
  reg_data[out_sample == 0L & (is.na(peer_depress_median)|is.na(peer_depress_q05)|is.na(peer_depress_q95)), out_sample := 2L]
  reg_data[out_sample == 0L & (is.na(fcesd_2)), out_sample := 3L]
  reg_data[out_sample == 0L & (is.na(depress)), out_sample := 5L]
  reg_data[out_sample == 0L & (is.na(parent_attachment)), out_sample := 6L]
  reg_data[out_sample == 0L & !complete.cases(reg_data[,..ind_controls]), out_sample := 8L]

fig_parent = ggplot(reg_data[out_sample==0,], aes(x=parent_attachment)) +
  geom_bar(aes(y = (..count..)/sum(..count..)),fill='black', alpha=0.9) + 
  theme_pubr() +
  labs(x='Parent-Child Attachment',
    y='Percentage')
ggsave(file.path(project_path,'output','pattachment_distribution.png'),fig_parent,width=4,height=4)

# -----
# random falsification tests
# -----
model_fe_grade = read_fst(file.path(data_path, 'processed','simulation_all','sim_grade_model_coef.fst'),as.data.table=TRUE)

names(model_fe_grade)[3:8] = c('coef','se','t','p','lci','uci')
model_fe_grade[,significant := as.factor(ifelse(p < 0.05,'sig','not-sig.'))]

model_fe_grade[,coef_name := factor(coef_name, 
    levels=c('peer_depress_q95','peer_depress_q05','peer_depress_median'))]

mean(model_fe_grade[coef_name == 'peer_depress_q95',lci]>0)
mean(model_fe_grade[coef_name == 'peer_depress_q95',uci]<0)
mean(model_fe_grade[coef_name == 'peer_depress_q05',uci]<0)
mean(model_fe_grade[coef_name == 'peer_depress_q95',coef] > 0.116)

model_fe_grade[, quantile(coef, probs=c(.025, 0.975)),by='coef_name']

model_fe_grade[,tail := 'normal']
model_fe_grade[coef_name == 'peer_depress_q05' & coef > 0.071, tail := 'sig']
model_fe_grade[coef_name == 'peer_depress_q05' & coef < -0.071, tail := 'sig']

model_fe_grade[coef_name == 'peer_depress_q95' & coef < -0.093, tail := 'sig']
model_fe_grade[coef_name == 'peer_depress_q95' & coef > 0.099, tail := 'sig']

model_fe_grade[coef_name == 'peer_depress_median' & coef < -1.87, tail := 'sig']
model_fe_grade[coef_name == 'peer_depress_median' & coef > 1.813, tail := 'sig']


pdf(file.path(project_path,'output','falsification_random_peer.pdf'),width=8,height=4)
layout(matrix(1:3,1,3))
  hist(model_fe_grade[coef_name =='peer_depress_q05',coef], breaks=30,
      ylab='Frequency',xlab='Coefficients from 1000 Simulations',main='Percent of nondepressed peers')
  hist(model_fe_grade[coef_name =='peer_depress_q05' & tail =='sig',coef], breaks=30,add=TRUE,col='gray')
  abline(v=-0.010, col='black',lwd=2,lty=2)
  
  hist(model_fe_grade[coef_name =='peer_depress_median',coef], breaks=30,
      ylab='Frequency',xlab='Coefficients from 1000 Simulations',main='Peer Depression Median')
  hist(model_fe_grade[coef_name =='peer_depress_median' & tail =='sig',coef], breaks=30,add=TRUE,col='gray')
  abline(v=-0.199, col='black',lwd=2,lty=2)
  
  
  hist(model_fe_grade[coef_name =='peer_depress_q95',coef], breaks=30,
      ylab='Frequency',xlab='Coefficients from 1000 Simulations',main='Percent of highly depressed peers', xlim=c(-0.2,0.2))
  hist(model_fe_grade[coef_name =='peer_depress_q95' & tail =='sig',coef], breaks=30,add=TRUE,col='gray')
  abline(v=0.157, col='black',lwd=2,lty=2)
dev.off()

